{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "3a0af134",
   "metadata": {},
   "source": [
    "# How to get report data in CSV format?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e68b6239",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.datasets import fetch_openml\n",
    "\n",
    "from evidently.report import Report\n",
    "from evidently.metrics.base_metric import generate_column_metrics\n",
    "from evidently.metrics import ColumnSummaryMetric, ColumnMissingValuesMetric\n",
    "\n",
    "from evidently.test_suite import TestSuite\n",
    "from evidently.tests import TestNumberOfColumns\n",
    "from evidently.tests import TestColumnsType\n",
    "from evidently.tests import TestNumberOfEmptyRows\n",
    "from evidently.tests import TestNumberOfEmptyColumns\n",
    "from evidently.tests import TestNumberOfDuplicatedRows\n",
    "from evidently.tests import TestNumberOfDuplicatedColumns\n",
    "\n",
    "from evidently import ColumnMapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "8dfa9056",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = fetch_openml(name='adult', version=2, as_frame='auto')\n",
    "reference = data.frame[:10000]\n",
    "current = data.frame[10000:20000]\n",
    "\n",
    "columns = ColumnMapping(\n",
    "    target='class',\n",
    "    numerical_features=['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'],\n",
    "    categorical_features=['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "989dd40c",
   "metadata": {},
   "source": [
    "## Column Summary to csv through pandas dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b18416bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "column_summary = Report(metrics=[\n",
    "    generate_column_metrics(ColumnSummaryMetric, columns='all'),\n",
    "])\n",
    "\n",
    "column_summary.run(reference_data=reference, current_data=current, column_mapping=columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "90eb2ae5",
   "metadata": {},
   "outputs": [],
   "source": [
    "column_summary_dict = column_summary.as_dict()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "9cf034c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = {}\n",
    "for result in column_summary_dict['metrics']:\n",
    "    data[result['result']['column_name']] = {\n",
    "        **{f\"ref_{key}\": val for key, val in result['result']['reference_characteristics'].items()},\n",
    "        **{f\"cur_{key}\": val for key, val in result['result']['current_characteristics'].items()}\n",
    "    }\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "3e6732c4",
   "metadata": {},
   "outputs": [],
   "source": [
    "column_summary_frame = pd.DataFrame.from_dict(data, orient='index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6af81a93",
   "metadata": {},
   "outputs": [],
   "source": [
    "column_summary_frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "9cd0f09b",
   "metadata": {},
   "outputs": [],
   "source": [
    "#column_summary_frame.to_csv('column_summary_frame.csv', header='True', sep=',', index=True, index_label='column')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a10e2071",
   "metadata": {},
   "source": [
    "## ColumnMissingValuesMetric to csv through pandas dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "d5cd5c1c",
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_values = Report(metrics=[\n",
    "    generate_column_metrics(ColumnMissingValuesMetric, columns='all'),\n",
    "])\n",
    "\n",
    "missing_values.run(reference_data=reference, current_data=current, column_mapping=columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "a9eff907",
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_values_dict = missing_values.as_dict()\n",
    "\n",
    "data = {}\n",
    "for result in missing_values_dict['metrics']:\n",
    "    data[result['result']['column_name']] = {\n",
    "        **{f\"ref_{key}\": val for key, val in result['result']['reference'].items()},\n",
    "        **{f\"cur_{key}\": val for key, val in result['result']['current'].items()}\n",
    "    }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "1cf01b2a",
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_values_frame = pd.DataFrame.from_dict(data, orient='index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b371398",
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_values_frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "ab3bed24",
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_values_frame.to_csv('missing_values_frame.csv', header='True', sep=',', index=True, index_label='column')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3ee41ff6",
   "metadata": {},
   "source": [
    "## Several column-based metrics in csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "ae7b56e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "column_metrics_frame = pd.merge(column_summary_frame, missing_values_frame, left_index=True, right_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ecaaafb1",
   "metadata": {},
   "outputs": [],
   "source": [
    "column_metrics_frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "011d1c16",
   "metadata": {},
   "outputs": [],
   "source": [
    "#column_metrics_frame.to_csv('column_metrics_frame.csv', header='True', sep=',', index=True, index_label='column')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "16f9f892",
   "metadata": {},
   "source": [
    "## Test results in csv format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "4cf71597",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_tests = TestSuite(tests=[\n",
    "    TestNumberOfColumns(),\n",
    "    TestColumnsType(),\n",
    "    TestNumberOfEmptyRows(),\n",
    "    TestNumberOfEmptyColumns(),\n",
    "    TestNumberOfDuplicatedRows(),\n",
    "    TestNumberOfDuplicatedColumns()\n",
    "])\n",
    "\n",
    "dataset_tests.run(reference_data=reference, current_data=current, column_mapping=columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "2ec1f0e8",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_tests_dict = dataset_tests.as_dict()\n",
    "\n",
    "data = []\n",
    "for result in dataset_tests_dict['tests']:\n",
    "    data.append({\n",
    "        'test':result['name'],\n",
    "        'group':result['group'],\n",
    "        'status':result['status'],\n",
    "        }\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "efab3029",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_tests_frame = pd.DataFrame.from_records(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "668c52b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_tests_frame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ce996940",
   "metadata": {},
   "outputs": [],
   "source": [
    "#dataset_tests_frame.to_csv('dataset_tests_frame.csv', header='True', sep=',', index=True, index_label='index')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
